import logging
import os
import re
import chardet

from oecp.executor.base import CompareExecutor, CPM_CATEGORY_DIFF
from oecp.proxy.rpm_proxy import RPMProxy
from oecp.result.compare_result import CMP_RESULT_SAME, CompareResultComposite, CMP_TYPE_RPM, CMP_TYPE_RPM_CONFIG, \
    CompareResultComponent, CMP_RESULT_DIFF, CMP_RESULT_EXCEPTION
from oecp.utils.shell import shell_cmd

logger = logging.getLogger('oecp')


class PlainCompareExecutor(CompareExecutor):

    def __init__(self, dump_a, dump_b, config):
        super(PlainCompareExecutor, self).__init__(dump_a, dump_b, config)
        self.dump_a = dump_a.run()
        self.dump_b = dump_b.run()
        cache_require_key = 'extract'
        self._work_dir = self.config.get(cache_require_key, {}).get('work_dir', '/tmp/oecp')
        self.data = 'data'
        self.split_flag = '__rpm__'

    def _split_common_files(self, files_a, files_b):
        common_file_pairs = []
        for file_a in files_a:
            for file_b in files_b:
                if file_a.split(self.split_flag)[-1] == file_b.split(self.split_flag)[-1]:
                    common_file_pairs.append([file_a, file_b])
        return common_file_pairs

    @staticmethod
    def _save_diff_result(file_path, content):
        with open(file_path, "w") as f:
            f.write(content)

    def _get_file_encoding_format(self, file_path):
        """
        get the encoding format of the file
        Args:
            file_path: file_path

        Returns:
            file_type: file encoding format
        """
        with open(file_path, "rb") as file:
            contents = file.read()
            file_type = chardet.detect(contents)['encoding']
            return file_type

    def _exclude_comments(self, file_path):
        """
        remove the comments from the header file and
        save it in the original file format
        Args:
            file_path: file_path

        Returns:
            None
        """
        try:
            file_format = self._get_file_encoding_format(file_path)
            with open(file_path, "r", encoding=file_format,
                      errors='ignore') as file, open("%s.bak" % file_path,
                                                     "w",
                                                     encoding=file_format) as file_bak:
                contents = file.read()
                # use regex to exclude comments in matching header files
                # comment examples /* xxxxxx*/ and //
                new_contents = re.sub("/\*[\s\S]*?\*/|//.*", "", contents)
                file_bak.write(new_contents)
                os.remove(file_path)
                os.rename("%s.bak" % file_path, file_path)
        except (IOError, UnicodeDecodeError, OSError, FileNotFoundError) as error:
            logger.exception(f"file reading exception,{file_path} the format is {error} ")

    def _compare_result(self, dump_a, dump_b, single_result=CMP_RESULT_SAME):
        category = dump_a['category'] if dump_a['category'] == dump_b[
            'category'] else CPM_CATEGORY_DIFF
        kind = dump_a['kind']
        result = CompareResultComposite(CMP_TYPE_RPM, single_result, dump_a['rpm'], dump_b['rpm'], category)
        verbose_diff_path = f'{dump_a["rpm"]}__diff__{dump_b["rpm"]}'
        dump_a_files = dump_a[self.data]
        dump_b_files = dump_b[self.data]
        common_file_pairs = self._split_common_files(dump_a_files, dump_b_files)
        base_dir = os.path.join(self._work_dir, kind, verbose_diff_path)
        for pair in common_file_pairs:
            cmd = "diff -uN {} {}".format(pair[0], pair[1])
            if pair[0].endswith(".h") and pair[1].endswith(".h"):
                # call the self._exclude_comments function to remove
                # the comments in the header file
                logger.info("remove the comments in the header file")
                self._exclude_comments(pair[0])
                self._exclude_comments(pair[1])
                # use the diff command to compare
                # parameter -B: do not check blank lines
                # parameter -H: When the file is relatively large, the speed can be increased;
                cmd = "diff -uBHN {} {}".format(pair[0], pair[1])

            ret, out, err = shell_cmd(cmd.split())
            base_a = os.path.basename(pair[0])
            base_b = os.path.basename(pair[1])
            if ret and out:
                try:
                    # 替换diff中的文件名
                    out = re.sub("---\s+\S+\s+", "--- {} ".format(pair[0]), out)
                    out = re.sub("\+\+\+\s+\S+\s+", "+++ {} ".format(pair[1]), out)
                    if not os.path.exists(base_dir):
                        os.makedirs(base_dir)
                    file_path = os.path.join(base_dir,
                                             "%s__diff__%s" % (base_a, base_b))
                    self._save_diff_result(file_path, out)
                    logger.info("plain files are diff")
                    data = CompareResultComponent(
                        CMP_TYPE_RPM_CONFIG, CMP_RESULT_DIFF, base_a, base_b, file_path)
                    result.set_cmp_result(CMP_RESULT_DIFF)
                except IOError:
                    logger.exception("save compare result exception")
                    data = CompareResultComponent(
                        CMP_TYPE_RPM_CONFIG, CMP_RESULT_EXCEPTION, base_a, base_b)
            else:
                data = CompareResultComponent(
                        CMP_TYPE_RPM_CONFIG, CMP_RESULT_SAME, base_a, base_b)
            result.add_component(data)
        return result

    def compare(self):
        compare_list = []
        for dump_a in self.dump_a:
            for dump_b in self.dump_b:
                # 取rpm name 相同进行比较
                if RPMProxy.rpm_name(dump_a['rpm']) == RPMProxy.rpm_name(dump_b['rpm']):
                    result = self._compare_result(dump_a, dump_b)
                    compare_list.append(result)
        return compare_list

    def run(self):
        result = self.compare()
        return result
